Set up

First we read in data, which has been pre-processed in a different R script.

here::i_am('pilot_pp_feb2024_models.Rmd')
#pilotdata <- read.csv("pilot_pp_feb2024_data_cleaned copy.csv") %>%
pilotdata <- read.csv("pilot_pp_feb2024_data_cleaned_addTiming.csv") %>%
  # TODO : REPLACE WITH ACTUAL CALCULATIONS USING BIRTHDAY + DATE OF TEST
  mutate(child_age_days = child__age_in_days,
         child_age_months = floor(child__age_in_days / 30),
         child_age_years = child_age_months / 12,
         child_age_years_group = floor(child_age_years),
         child_gender = child__gender,
         timeToFirstIdea = as.integer(timeToFirstIdea)) %>%
  select(!starts_with("child__")) %>% # unselect any other identifying variables
  filter(!response_uuid %in% c("6e60e04e-e5c6-4fdf-b562-19777c2bf416",
                               "756e9d1a-1bcf-442c-8c9e-c09a4218c2f9",
                               "d4f5df0f-2525-4101-8831-93d364177093", # generate video couldn't load
                               "52d1028f-5236-40c3-86f3-f5a4f95a9521", # generate incomplete
                               "54d6378d-d0e6-4f37-a0a7-7028ac6380b2", # generate no response
                               "1268dff5-832e-4443-98b4-d5cb4e61ccb6" # 9-year-old
                               )) %>%
  select(-nideas, -time) %>%
  rename(time=time_junyi, nideas=nideas.1) %>%
  mutate(timeToFirstIdea = timeToFirstIdea - introAudioLength)
  
  
pilotkids <- pilotdata %>% select(response_uuid, child_hashed_id, child_age_days:child_gender) %>%
  unique()

# print a few random lines
pilotdata %>% group_by(condition) %>% 
  slice_sample(n=4) %>%
  kable()
joinID response_uuid child_hashed_id condition trialnumber scene images.1.id images.2.id chosen_object response_side object_match match_binary setID scene_binary chosen_object_binary participant__global_id participant__hashed_id participant__nickname child_notes transcript ideas_junyi time nideas video_id generate_object frame_num expected_frame_duration trial_num introAudioLength timeToFirstIdea codernotes child_age_days child_age_months child_age_years child_age_years_group child_gender
EAPGHJ-choose-shoppers-yellowball_left 1a9d6e02-d813-47c8-96c2-c789fe6e24ea EAPGHJ choose 1 shoppers yellowball_left hanger_right hanger right yellowball FALSE set8 1 1 23a9ee51-e0be-4122-b678-c376b940b75d 37BBXF Sha NA NA NA NA NA NA NA NA NA NA NA NA NA 2327 77 6.42 6 m
DVDXL2-choose-magicians-sponge_left b7d96c3a-9b1a-4209-b74f-4229b0c2ff18 DVDXL2 choose 7 magicians sponge_left featherduster_right featherduster right featherduster TRUE set7 1 1 12f31217-a401-4062-be34-7d30a1a73285 474P7M Tdtt NA NA NA NA NA NA NA NA NA NA NA NA 1940 64 5.33 5 f
CFSTFX-choose-campers-blanket_left 98506dba-bad0-4b30-b3db-197dd422f8c9 CFSTFX choose 3 campers blanket_left helmet_right blanket left blanket TRUE set2 1 1 d6133ccb-7330-40b5-839d-710492ff610c EMT3KG Emily NA NA NA NA NA NA NA NA NA NA NA NA NA 2021 67 5.58 5 f
JWcSDV-choose-shoppers-hanger_left a8c206f6-69ce-4f1c-bf57-74d812c3930a JWcSDV choose 6 shoppers hanger_left yellowball_right hanger left yellowball FALSE set8 1 1 53f148ef-0fdd-4f4a-af02-5aeacca79f2f HEG7PC Mike NA NA NA NA NA NA NA NA NA NA NA NA 2623 87 7.25 7 f
KYX2M6-generate-shoppers-yellowball 03d8d105-8a4a-43d2-8fe9-2cf1af771413 KYX2M6 generate 11 shoppers yellowball NA NA NA yellowball NA set8 1 0 269773b4-40fe-4ca1-90ac-6f955c9eb1b1 dWAMM2 Lookit Account NA NA NA NA NA NA NA NA NA NA NA NA NA 1912 63 5.25 5 f
YV2SRA-generate-astronauts-helmet 3afac10b-b7e1-4180-8e96-d19548a5f4bc YV2SRA generate 11 astronauts helmet NA NA NA helmet NA set2 0 0 6dd153a7-715d-43b0-a03e-4e0eef2bd309 dWB4Sd Freida YV2SRA it could protect his head from astreroid helmet 00:11 1 NA NA NA NA NA NA NA NA 2757 91 7.58 7 m
T3FG37-generate-musicians-socks dc85815b-8f8c-48ea-aaa8-90dc059b6372 T3FG37 generate 11 musicians socks NA NA NA broom NA set4 0 0 a5fe8e78-94a9-45e8-b841-a477dcc89c5f dSKR4N bloera NA NA NA NA NA NA NA NA NA NA NA NA NA 2704 90 7.50 7 m
Yd3ZSU-generate-fashionshow-pokerchips 80297dc0-92de-4269-89e5-5cc40e57a9d2 Yd3ZSU generate 1 fashionshow pokerchips NA NA NA pokerchips NA set6 1 0 0f33ad1a-fb89-4ffa-b033-53d953fc8047 PMRD67 MW Yd3ZSU those could be for playing all done toy 00:39 1 NA NA NA NA NA NA NA NA 1854 61 5.08 5 f

check data

Check the data has the right dimensions, number of trials, etc.

  1. Each child contributes one session TRUE

  2. How many choose trials does each child contribute? (Target= 8). V6cM2X has 9 (replayed one trial); c2CZ4R has 4 (and no generate data)

pilotdata %>% filter(condition=="choose") %>%
  count(child_hashed_id) %>% count(n)
##   n nn
## 1 4  1
## 2 8 35
## 3 9  1
  1. How many generate trials does each child contribute? (Target= 16).
pilotdata %>% filter(condition=="generate") %>%
  count(child_hashed_id) %>% count(n)
##    n nn
## 1 15  4
## 2 16 32

Make Choose dataframes

This data combines both choose and generate data, so let’s split them up and clean up any repeated trials, etc.

For each condition, we will produce a (1) trial-level dataframe and a (2) child-level dataframe with averages.

First do so for choose:

## TRIAL-LEVEL DATA
df.choose.trials <- pilotdata %>% 
  filter(condition == "choose") %>%
  filter(!is.na(chosen_object)) %>% # remove NA responses which indicate pause, prompt replays, etc.
# select only relevant columns
  select(child_hashed_id, condition, trialnumber, 
         setID, scene, scene_binary, object_match, chosen_object, 
         chosen_object_binary, chosen_side = response_side,  chosen_is_match = match_binary,
         child_age_days:child_gender) %>%
  mutate(chosen_is_match = factor(chosen_is_match, levels=c(FALSE, TRUE),
                                   labels=c("Chose Non-match", "Chose Match")))

## AGGREGATE PER CHILD
df.choose.kids <- df.choose.trials %>%
  group_by(child_hashed_id, condition, child_age_days, child_age_months, child_age_years, child_gender) %>% # group by child-level variables
  summarize(n_choose_left = mean(chosen_side=="left"),
            n_choose_match = mean(chosen_is_match))

## AGGREGATE PER SCENE
df.choose.scenes <- df.choose.trials %>%
  group_by(setID, scene, scene_binary) %>%
  summarize(
    mean_chose_match = mean(chosen_is_match=="Chose Match", na.rm=T),
    mean_chose_obj1 = mean(chosen_object_binary, na.rm=T)
  )

Make Stimuli dataframe

We use this for plotting and other data wrangling stuff

# each of 16 objects
objects <- df.choose.trials %>%
  select(setID, object=chosen_object, object_binary = chosen_object_binary) %>%
  unique() %>% arrange(setID, object_binary)
# each of 16 scenes
scenes <- df.choose.trials %>%
  select(setID, scene, scene_binary, object_match) %>% unique() %>%
  arrange(setID, scene_binary)
# 32 row data frame
stimuli <- full_join(scenes, objects)

Make Generate dataframes

Then for generate. Also compute time to first / last idea, rate of ideas.

TODO: re-code some trials. currently using manually checked time_junyi and nideas.1

# add function to convert time into seconds (numeric), append to choose
# add variable for time to LAST idea
# create variable for RATE of ideas

getseconds <- function(time) {
  minutes= as.integer(substr(time, 1,1))
  seconds= as.integer(substr(time, 3,4))
  duration = minutes*60 + seconds
  return(duration)    
}

## TRIAL-LEVEL DATA
df.generate.trials <- pilotdata %>% 
  filter(condition == "generate") %>%
  filter(!is.na(transcript)) %>% # remove responses not yet transcribed
# select only relevant columns
  select(child_hashed_id, condition, trialnumber, 
         setID, scene, scene_binary, object_generate = images.1.id, object_match,# IVs
         transcript, time, timeToFirstIdea, nideas, # DVs
         child_age_days:child_gender) %>%
  mutate(object_is_match = object_generate == object_match) %>%
  mutate(object_is_match = factor(object_is_match, levels=c(FALSE, TRUE),
                                   labels=c("Non-Matching Object", "Match Object"))) %>%
  mutate(#timeToFirstIdea = getseconds(str_sub(time, 1, 4)),
         timeToLastIdea = getseconds(str_sub(time, -4, -1))
         )%>%
  mutate(rateOfIdeas = ifelse(nideas < 1, NA, nideas/timeToLastIdea),
         timeAvg = ifelse(nideas < 1, NA, timeToLastIdea / nideas)) %>%
  mutate(timeToFirstIdea = ifelse(timeToFirstIdea < -15, NA, timeToFirstIdea))

## AGGREGATE PER CHILD
df.generate.kids <- df.generate.trials %>%
  group_by(child_hashed_id, condition, child_age_days, child_age_months, child_age_years, child_gender) %>% # group by child-level variables
  summarize(mean_ideas = mean(nideas),
            mean_timeToFirst = mean(timeToFirstIdea))

Make Item-level choose and generate dataframes

A dataframe with 32 rows (all combinations of scenes and objects). Aggregate per item: choices, and generate metrics.

df.choose.items <- 
  df.choose.scenes %>%
  mutate(mean_chose_obj0 = 1-mean_chose_obj1) %>%
  pivot_longer(cols=c("mean_chose_obj0", "mean_chose_obj1"),
               names_to="object_binary",
               values_to="chosen_proportion") %>%
  mutate(object_binary = as.integer(substr(object_binary, 15, 15))) %>% left_join(select(stimuli, -object_match))

df.generate.items <- df.generate.trials %>%
  group_by(setID, scene, scene_binary, object_generate, object_is_match) %>%
  summarize(
    mean_nideas = mean(nideas, na.rm=T),
    mean_time1 = mean(timeToFirstIdea, na.rm=T),
    mean_rateideas = mean(rateOfIdeas, na.rm=T),
    mean_timeAvg = mean(timeAvg, na.rm=T)
  ) %>%
  left_join(rename(objects, object_generate=object))


df.items <- df.generate.items %>%
  left_join(df.choose.items) %>%
  ungroup()

Trial-level choose and generate

FINALLY,Make a dataframe with 16 rows per child (for each scene) containing:

  • scene
  • object_generate
  • idea measures (n ideas, time to first idea, rate of ideas)
  • was object chosen or not? match or not?
df.trials <- 
  df.generate.trials %>%
  select(child_hashed_id, setID, scene, scene_binary, 
         object_generate, object_match, object_is_match,
         nideas, timeToFirstIdea, rateOfIdeas, timeAvg) %>% 
  left_join(objects, by=c('setID', 'object_generate'='object')) %>%
  left_join(select(df.choose.trials, child_hashed_id, setID, scene, 
                   chosen_object, chosen_object_binary, chosen_side, chosen_is_match, 
                   child_age_days:child_gender)) %>%
  mutate(object_is_chosen = chosen_object==object_generate) %>%
  mutate(object_is_chosen = factor(object_is_chosen, levels=c(FALSE, TRUE),
                                   labels=c("Non-preferred object", "Preferred object"))) %>%
  arrange(child_hashed_id, setID, object_binary) %>%
  # now rearrange columns, clustered by meaning
  relocate(starts_with("chosen"), .after="timeAvg") %>% # move to the end 
  relocate(starts_with("object")) %>% # successively move to left
  relocate(starts_with("scene")) %>%
  relocate("setID") %>%
  relocate(starts_with("child")) # put this at the front

Participants & Data availability

The data comes from 37 children, ages 5.083 to 7.833 years (M = 6.588 years, SD = 0.918).

We have choose data from 37 children (M = 6.588 years), contributing a total of 292 trials.

For generate data, we have responses from 24 children (M = 6.521 years), contributing a total of381. However, we have to exclude 25 of these trials due to the following reasons:

df.generate.trials %>% filter(is.na(nideas)) %>% count(transcript)
##                   transcript  n
## 1      (Video couldn't load)  3
## 2    (couldn’t hear clearly)  1
## 3 it can be (video cuts off)  1
## 4                   no audio  2
## 5                 no respond  1
## 6                no response 17

Choose descriptives

We ran two counterbalanced lists, scene_binary = 0 or 1. Due to Lookit randomization + exclusions, one list has 12 participants, the other has 24 participants (25 if counting the child who only did half of all 8 trials).

df.choose.trials %>% count(scene_binary, setID) %>%
  count(scene_binary, n)
##   scene_binary  n nn
## 1            0 12  8
## 2            1 24  4
## 3            1 25  4

Overall (scene_binary x chosen_binary)

Aggregating across sets, do object choices vary by scene? (Note that this arbitrarily assigns each scene a 0/1 label)

tab<- with(df.choose.trials,
     table(scene_binary, chosen_object_binary))
tab
##             chosen_object_binary
## scene_binary   0   1
##            0  70  26
##            1  77 119
tab.fisher <- fisher.test(tab)
tab.chi <- chisq.test(tab)

We can test this contingency using a chi-square:

tab.chi
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  tab
## X-squared = 28, df = 1, p-value = 1e-07
# p<.05; there is a significant association between the scene presented, and the object chosen by the child

We can additionally assess the strength of this correlation using an Odds Ratio, i.e., relative odds of choosing object 1 given scene 1 vs scene 0. The OR is 4.14 (95% CI = 2.369 – 7.397; Fisher’s test p <.001).

Visualize

df.choose.trials %>%
  count(scene_binary, chosen_object_binary) %>%
  mutate(y_text = .05 + .9 * chosen_object_binary) %>% # 0.05, .95
  ggplot(aes(x=scene_binary)) + 
  geom_bar(aes(y=n, alpha=chosen_object_binary),
           position="fill", stat="identity", color="black")+
  geom_hline(yintercept = 0.5, linetype="dashed")+
  geom_text(aes(y=y_text, label = paste0("Object ",chosen_object_binary)), color="black")+
  scale_y_continuous(labels=scales::label_percent(), 
                     breaks=c(0, 0.5, 1),
                     name = "Proportion of choices") +
  scale_x_continuous(labels=c("Scene 0", "Scene 1"),
                     breaks=c(0, 1),
                     name=c("Scene")) +
  scale_alpha_continuous(range=c(0.1, .6)) +
  theme(legend.position = "none")

Mean match per set

t <- t.test(df.choose.scenes$mean_chose_match)

For each scene, we compute proportion of kids choosing the matched object. The mean proportion is 0.67 (SD = 0.217, range = [0.25 – 1]), which is above chance (t(15) = 12.381, p < .001).

ggplot(df.choose.scenes, aes(y=mean_chose_match)) +
  geom_hline(aes(yintercept=0.5), color="grey50", linetype="dashed") +
  stat_summary(aes(x=0), fun = "mean", geom = "point", size=3)+
  stat_summary(aes(x=0.1), fun = "mean", geom = "text", label="all",color="black")+
  stat_summary(aes(x=0), fun.data="mean_ci", color="black",
               geom="linerange")+
  geom_point(aes(color=setID, fill=setID, x=0.2+scene_binary/2), slpha=0.6, size=2, shape=21) +
  ggrepel::geom_text_repel(aes(label = scene, color=setID, x=0.2+scene_binary/2),
                           seed=42, 
                           force        = 0.4,
                           nudge_x      = 0.1,
                           direction    = "both",
                           hjust        = 0,
                           segment.size = 0.2) +
  scale_y_continuous(limits = c(0,1), labels=scales::label_percent(), 
                     name = "Chose predicted object") +
  scale_x_continuous(limits = c(-.1, 1.1), breaks = c(0.2, 0.7), labels=c("A", "B")) +
  theme(
    # axis.ticks.x = element_blank(),
    # axis.text.x  = element_blank(),
    axis.title.x = element_blank(),
    legend.position = "none"
  )

Odds ratio per set

For each set, compute odds ratio as a measure of effect size.

OR = Odds of selecting object 1 given scene 1 vs. scene 0

OR = 1 means no effect. OR > 1 means predicted direction: prefer match

df.sets <- df.choose.trials %>%
  count(setID, scene_binary, chosen_object_binary) %>%
  group_by(setID, scene_binary, chosen_object_binary) %>% 
  # mutate(n_subj = sum(n)) %>%
  ungroup() %>%
  # mutate(prop = n / n_subj) %>%
  pivot_wider(names_from=c(scene_binary, chosen_object_binary), 
              values_from=n,
              names_prefix = c('scene'),
              values_fill = 0) %>%
  mutate(odds_o1_s1 = scene1_1/scene1_0, 
         odds_o1_s0 = scene0_1/scene0_0) %>%
  mutate(oddsratio = odds_o1_s1 / odds_o1_s0)

# Display
df.sets %>% select(setID, oddsratio) %>% kable()
setID oddsratio
set1 6.000
set2 36.667
set3 1.000
set4 1.000
set5 10.154
set6 Inf
set7 23.000
set8 0.923

Visualize proportion choosing object 0/1

df.choose.trials %>%
  mutate(scene = forcats::fct_reorder(scene, scene_binary),
         chosen_object = forcats::fct_reorder(chosen_object, chosen_object_binary)) %>%
  count(setID, scene, chosen_object, chosen_object_binary) %>%
  mutate(y_n = 0.1 + 0.8 * chosen_object_binary,# 0.1, 0.9
                     y_text = -.05 + 1.1 * chosen_object_binary) %>% # -0.05, 1.05
  group_by(setID, scene) %>% mutate(n_subj = sum(n)) %>%
  ungroup() %>%
  ggplot(aes(x=scene, y=n, fill=setID)) + 
  geom_bar(aes(width = n_subj/25, # bar width reflcets data availability
               alpha=chosen_object_binary), 
           position="fill", stat="identity", color="black")+
  geom_text(aes(label = n, y=y_n), color="black") +#position=position_fill(vjust=0.7)
  geom_text(aes(label = chosen_object, y=y_text), size=2, color="black")+
  facet_wrap('setID', scales="free", nrow=2) +
  scale_y_continuous(labels=scales::label_percent(), 
                     breaks=c(0, 0.5, 1),
                     name = "Proportion of choices") +
  scale_alpha_continuous(range=c(1, 0.5)) +# dark = object 0
  theme(legend.position = "none",
        strip.text.x = element_text(size = 12, face = "bold"),
        axis.text.x = element_text(size=8, vjust=1)) # overlapping x-labels

GLM: choice vs. scene

We have three kinds of trial-level analyses, each accounting for random effects of scene/set and childID.

  1. Predict choice_binary from scene_binary. This is a generalization of the chi-square, accounting for setID and childID. Each participant contributes 8 rows.

  2. Predict chosen_is_match (T/F), accounting for scene and childID. This estimates the probability that on any given trial, children will choose the matching object. Each participant contributes 8 rows.

  3. Predict object_is_chosen (T/F) from object_is_match, accounting for scene and childID. This estimates how much more likely an object will be chosen if it is a matching object. Each participant contributes 16 rows. We’ll run this after exploring the generate data

Analysis 3 and 2, in their simplest forms, are the same. What changes is the kind of covariates we want to use. Analysis 3 – we can use object-level characteristics, such as number of ideas for either object, or which scene was being presented. In Analysis 2, we can only include variables about that choice trial, e.g., Ratio of ideas nideas_chosen / nideas_notchosen or Difference between ideas nideas_chosen - nideas_notchosen.

GLM 1: choice_binary ~ scene_binary

QN: is chosen object & scene independent, according to pre-defined A/B labels?

Predict object choice (0 or 1) based on scene (0 or 1), with random effect of Set and Child.

  • Model 1: chosen_object_binary ~ scene_binary + (1|setID) + (1|childID)
  • Model 2: chosen_object_binary ~ scene_binary + age + (1|setID) + (1|childID)

TODO: We should respect experimental design in including (1|childID), however, note that in the pilot data, there is negligible variance attributed to childID.

# Prepare data frame
df.choose.trials_regression <- df.choose.trials %>%
  mutate(age = scale(child_age_months, center=F),
         scene_binary = as.factor(scene_binary),
         chosen_object_binary = as.factor(chosen_object_binary))

# Model 1
choice01_model1 <- glmer(chosen_object_binary ~ scene_binary + (1 | setID) + (1 | child_hashed_id), 
                      data = df.choose.trials_regression, 
                      family = binomial)
choice01_model2 <- glmer(chosen_object_binary ~ scene_binary + age + (1 | setID) + (1 | child_hashed_id),
                      data = df.choose.trials_regression, 
                      family = binomial)

Test if age improves model fit (NOTE: we use age in months, center-scaled) Age is not significant.

anova(choice01_model2, choice01_model1)
## Data: df.choose.trials_regression
## Models:
## choice01_model1: chosen_object_binary ~ scene_binary + (1 | setID) + (1 | child_hashed_id)
## choice01_model2: chosen_object_binary ~ scene_binary + age + (1 | setID) + (1 | child_hashed_id)
##                 npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## choice01_model1    4 368 382   -180      360                    
## choice01_model2    5 369 387   -179      359  0.81  1       0.37

Report model summary in terms of exponentiated coefficients, i.e., odds ratios.

kable(tidy(choice01_model1, exponentiate=T, conf.int=T))
effect group term estimate std.error statistic p.value conf.low conf.high
fixed NA (Intercept) 0.336 0.117 -3.14 0.002 0.17 0.663
fixed NA scene_binary1 4.857 1.422 5.40 0.000 2.74 8.620
ran_pars child_hashed_id sd__(Intercept) 0.000 NA NA NA NA NA
ran_pars setID sd__(Intercept) 0.696 NA NA NA NA NA

Full model summary

summary(choice01_model1)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: 
## chosen_object_binary ~ scene_binary + (1 | setID) + (1 | child_hashed_id)
##    Data: df.choose.trials_regression
## 
##      AIC      BIC   logLik deviance df.resid 
##      368      382     -180      360      288 
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -2.319 -0.808 -0.366  0.803  2.729 
## 
## Random effects:
##  Groups          Name        Variance Std.Dev.
##  child_hashed_id (Intercept) 0.000    0.000   
##  setID           (Intercept) 0.485    0.696   
## Number of obs: 292, groups:  child_hashed_id, 37; setID, 8
## 
## Fixed effects:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)     -1.090      0.347   -3.14   0.0017 ** 
## scene_binary1    1.580      0.293    5.40  6.6e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## scene_bnry1 -0.596
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')

Model predictions as a plot

plot_model(choice01_model1, type="pred")
## $scene_binary

GLM 2: choice_is_match ~ 1

Predict whether child chose predicted object with random effect of scene and child.

  • Model 1: chosen_is_match ~ 1 + (1|scene) + (1|childID)
  • Model 2: chosen_is_match ~ 1 + age + (1|scene) + (1|childID)

TODO: We should respect experimental design in including (1|childID), however, note that in the pilot data, there is negligible variance attributed to childID.

# Model 1
choiceMatch_model1 <- glmer(chosen_is_match ~ 1 + (1 | scene) + (1 | child_hashed_id), 
                      data = df.choose.trials_regression, 
                      family = binomial)
choiceMatch_model2 <- glmer(chosen_is_match ~ 1 + age + (1 | scene) + (1 | child_hashed_id),
                      data = df.choose.trials_regression, 
                      family = binomial)

Test if age improves model fit (NOTE: we use age in months, center-scaled) Age is not significant.

anova(choiceMatch_model1, choiceMatch_model2)
## Data: df.choose.trials_regression
## Models:
## choiceMatch_model1: chosen_is_match ~ 1 + (1 | scene) + (1 | child_hashed_id)
## choiceMatch_model2: chosen_is_match ~ 1 + age + (1 | scene) + (1 | child_hashed_id)
##                    npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## choiceMatch_model1    3 356 367   -175      350                    
## choiceMatch_model2    4 358 373   -175      350  0.11  1       0.74

Exponentiate estimate. exp(intercept) indicates Odds of choosing match object.

kable(tidy(choiceMatch_model1, exponentiate=T, conf.int=T))
effect group term estimate std.error statistic p.value conf.low conf.high
fixed NA (Intercept) 2.322 0.677 2.89 0.004 1.31 4.11
ran_pars child_hashed_id sd__(Intercept) 0.001 NA NA NA NA NA
ran_pars scene sd__(Intercept) 0.987 NA NA NA NA NA

Full model summary

summary(choiceMatch_model1)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: chosen_is_match ~ 1 + (1 | scene) + (1 | child_hashed_id)
##    Data: df.choose.trials_regression
## 
##      AIC      BIC   logLik deviance df.resid 
##      356      367     -175      350      289 
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -3.005 -0.922  0.428  0.699  1.463 
## 
## Random effects:
##  Groups          Name        Variance Std.Dev.
##  child_hashed_id (Intercept) 6.55e-07 0.000809
##  scene           (Intercept) 9.75e-01 0.987395
## Number of obs: 292, groups:  child_hashed_id, 37; scene, 16
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)   
## (Intercept)    0.842      0.291    2.89   0.0038 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## Model failed to converge with max|grad| = 0.00422399 (tol = 0.002, component 1)

Generate descriptives

n=24 kids, 356 trials

We have 3 dependent measures for each trial:

  • Number of ideas: [0, 6]
  • Time to first idea (in seconds, if gave any idea) [-4.939, 40.932]
  • Average time per idea (in seconds, if any idea) [3.6, 45]
  • Rate of ideas (per second, if any ideas) 0.022, 0.278

Overall distributions

df.trials %>%
  summarise_at(vars(nideas, timeToFirstIdea, timeAvg, rateOfIdeas),
            list(mean=~ mean(.x, na.rm = TRUE),
                 sd= ~sd(.x, na.rm = TRUE)))
##   nideas_mean timeToFirstIdea_mean timeAvg_mean rateOfIdeas_mean nideas_sd
## 1        1.46                 2.38         12.2           0.0942     0.901
##   timeToFirstIdea_sd timeAvg_sd rateOfIdeas_sd
## 1               4.98       5.59         0.0335

negative time 1

df.trials %>% count(timeToFirstIdea < 0)
##   timeToFirstIdea < 0   n
## 1               FALSE 163
## 2                TRUE  79
## 3                  NA 139
plotdist <- function(data, dv, bw) {
  DV = enquo(dv)
  ggplot(data) + 
  geom_vline(aes(xintercept=mean(!!DV, na.rm=T)), 
             color="blue", size=1, linetype="dashed")+
  geom_histogram(aes(y = bw*after_stat(density),
  #geom_histogram(aes(y = after_stat(count / sum(count)),
                     x = !!DV),
                 binwidth=bw, position = position_dodge(0.3),
                 alpha=0.5, color="grey30") +
  scale_y_continuous(expand = expansion(mult = c(0, 0.2)),
                     labels = scales::percent_format(),
                     name="Frequency")
}
plotdist(df.trials, nideas, 1)

plotdist(df.trials, timeToFirstIdea, 1)

plotdist(df.trials, timeAvg, 1)

plotdist(df.trials, rateOfIdeas, 0.01)

Whether prompted object matches scene

Plot nideas

ggpaired(df.generate.items, x='object_is_match', y='mean_nideas',
         color='object_is_match', alpha=0.2,
         line.color='grey80',
         palette=pal_matches) +
  scale_x_discrete(labels = scales::label_wrap(10)) +
  labs(y="Avg. Number of ideas", x=NULL) +
  theme(legend.position = "none") +
  stat_compare_means(paired=T)

Plot time1

ggpaired(df.generate.items, x='object_is_match', y='mean_time1',
         color='object_is_match', alpha=0.2,
         line.color='grey80',
         palette=pal_matches) +
  scale_x_discrete(labels = scales::label_wrap(10)) +
  labs(y="Avg. time to first idea", x=NULL) +
  theme(legend.position = "none") +
  stat_compare_means(paired=T)

by subject, match

df.generate.bykid.match <- df.generate.trials %>%
  group_by(child_hashed_id, object_is_match) %>%
  summarize(
    mean_nideas = mean(nideas, na.rm=T),
    mean_time1 = mean(timeToFirstIdea, na.rm=T),
    mean_rateideas = mean(rateOfIdeas, na.rm=T),
    mean_timeAvg = mean(timeAvg, na.rm=T)
  )

df.generate.bykid.match %>%
  group_by(object_is_match) %>%
  summarise_at(vars(starts_with("mean")),
            list(mean=~ mean(.x, na.rm = TRUE),
                 sd= ~sd(.x, na.rm = TRUE)))
## # A tibble: 2 × 9
##   object_is_match     mean_nideas_mean mean_time1_mean mean_rateideas_mean
##   <fct>                          <dbl>           <dbl>               <dbl>
## 1 Non-Matching Object             1.44            2.47              0.0936
## 2 Match Object                    1.44            2.35              0.0939
## # ℹ 5 more variables: mean_timeAvg_mean <dbl>, mean_nideas_sd <dbl>,
## #   mean_time1_sd <dbl>, mean_rateideas_sd <dbl>, mean_timeAvg_sd <dbl>

Plot N IDEAS

ggpaired(df.generate.bykid.match, x='object_is_match', y='mean_nideas',
         color='object_is_match', alpha=0.2, group="child_hashed_id",
         line.color='grey80',
         palette=pal_matches) +
  scale_x_discrete(labels = scales::label_wrap(10)) +
  labs(y="Avg. Number of ideas", x=NULL) +
  theme(legend.position = "none") +
  stat_compare_means(paired=T)

Plot N IDEAS

ggpaired(df.generate.bykid.match, x='object_is_match', y='mean_time1',
         color='object_is_match', alpha=0.2,group="child_hashed_id",
         line.color='grey80',
         palette=pal_matches) +
  scale_x_discrete(labels = scales::label_wrap(10)) +
  labs(y="Avg. Time to 1st idea", x=NULL) +
  theme(legend.position = "none") +
  stat_compare_means(paired=T)

by subject, chosen

df.generate.bykid.choice <- df.trials %>%
  group_by(child_hashed_id, object_is_chosen) %>%
  summarize(
    mean_nideas = mean(nideas, na.rm=T),
    mean_time1 = mean(timeToFirstIdea, na.rm=T),
    mean_rateideas = mean(rateOfIdeas, na.rm=T),
    mean_timeAvg = mean(timeAvg, na.rm=T)
  )

df.generate.bykid.choice %>%
  group_by(object_is_chosen) %>%
  summarise_at(vars(starts_with("mean")),
            list(mean=~ mean(.x, na.rm = TRUE),
                 sd= ~sd(.x, na.rm = TRUE)))
## # A tibble: 2 × 9
##   object_is_chosen     mean_nideas_mean mean_time1_mean mean_rateideas_mean
##   <fct>                           <dbl>           <dbl>               <dbl>
## 1 Non-preferred object             1.43            2.82              0.0940
## 2 Preferred object                 1.45            2.03              0.0935
## # ℹ 5 more variables: mean_timeAvg_mean <dbl>, mean_nideas_sd <dbl>,
## #   mean_time1_sd <dbl>, mean_rateideas_sd <dbl>, mean_timeAvg_sd <dbl>

Plot N IDEAS

ggpaired(df.generate.bykid.choice, x='object_is_chosen', y='mean_nideas',
         color='object_is_chosen', alpha=0.2, group="child_hashed_id",
         line.color='grey80',
         palette=pal_chosen) +
  scale_x_discrete(labels = scales::label_wrap(10)) +
  labs(y="Avg. Number of ideas", x=NULL) +
  theme(legend.position = "none") +
  stat_compare_means(paired=T)

Plot N IDEAS

ggpaired(df.generate.bykid.choice, x='object_is_chosen', y='mean_time1',
         color='object_is_chosen', alpha=0.2,group="child_hashed_id",
         line.color='grey80',
         palette=pal_chosen) +
  scale_x_discrete(labels = scales::label_wrap(10)) +
  labs(y="Avg. Time to 1st idea", x=NULL) +
  theme(legend.position = "none") +
  stat_compare_means(paired=T)

paired t-test - BY TRIALS, IGNORE

No difference between Means?

mu1 <- df.trials %>%
  group_by(object_is_match) %>%
  summarise_at(vars(nideas, timeToFirstIdea, timeAvg, rateOfIdeas),
            list(mean=~ mean(.x, na.rm = TRUE),
                 sd= ~sd(.x, na.rm = TRUE)))
kable(mu1)
object_is_match nideas_mean timeToFirstIdea_mean timeAvg_mean rateOfIdeas_mean nideas_sd timeToFirstIdea_sd timeAvg_sd rateOfIdeas_sd
Non-Matching Object 1.46 2.44 12.2 0.094 0.864 4.92 5.34 0.034
Match Object 1.46 2.33 12.2 0.095 0.938 5.05 5.84 0.033

T-test

# WIDE data frame - one row per scene; compute ratios & differences of DVs
df.relative.match <- df.trials %>% 
  filter(!is.na(nideas)) %>%
  select(child_hashed_id, child_age_months, 
                     setID, scene, object_is_match, 
         nideas, timeToFirstIdea, timeAvg, rateOfIdeas) %>%
  mutate(object_is_match = object_is_match=="Match Object") %>%
  pivot_wider(names_from = object_is_match,
              values_from = c(nideas, timeToFirstIdea, timeAvg, rateOfIdeas)) %>%
  mutate(nideas_ratio = nideas_TRUE / nideas_FALSE,
         time1_ratio = timeToFirstIdea_TRUE / timeToFirstIdea_FALSE,
         timeAvg_ratio = timeAvg_TRUE / timeAvg_FALSE,
         rate_ratio = rateOfIdeas_TRUE / rateOfIdeas_FALSE,
         nideas_diff = nideas_TRUE - nideas_FALSE,
         time1_diff = timeToFirstIdea_TRUE - timeToFirstIdea_FALSE,
         timeAvg_diff = timeAvg_TRUE - timeAvg_FALSE,
         rate_diff = rateOfIdeas_TRUE - rateOfIdeas_FALSE
         )

# t-test of within-sub difference
mu1a <- df.relative.match %>% 
  select(-ends_with('TRUE'), -ends_with('FALSE')) %>%
  pivot_longer(cols = ends_with("diff")) %>%
  filter(value!= Inf) %>%
  group_by(name) %>% # for each measure
  nest() %>% # nest the data
  mutate(
    N = map(data, nrow),
    t_test = map(data, ~{t.test(.x$value) %>% tidy()})) %>% 
  select(-data) %>%
  unnest(cols = c(N, t_test))

kable(mu1a)
name N estimate statistic p.value parameter conf.low conf.high method alternative
nideas_diff 169 0.000 0.000 1.000 168 -0.138 0.138 One Sample t-test two.sided
time1_diff 109 -0.649 -1.096 0.275 108 -1.822 0.524 One Sample t-test two.sided
timeAvg_diff 158 0.023 0.045 0.964 157 -0.958 1.003 One Sample t-test two.sided
rate_diff 158 0.001 0.337 0.736 157 -0.005 0.007 One Sample t-test two.sided

T-test on ratios

mu1b <- df.relative.match %>% 
  select(-ends_with('TRUE'), -ends_with('FALSE')) %>%
  pivot_longer(cols = ends_with("ratio")) %>%
  filter(value!= Inf) %>%
  group_by(name) %>% # for each measure
  nest() %>% # nest the data
  mutate(
    N = map(data, nrow),
    t_test = map(data, ~{t.test(.x$value, mu=1) %>% tidy()})) %>% 
  select(-data) %>%
  unnest(cols = c(N, t_test))

kable(mu1b)
name N estimate statistic p.value parameter conf.low conf.high method alternative
nideas_ratio 164 1.06 1.481 0.140 163 0.979 1.15 One Sample t-test two.sided
time1_ratio 109 21.63 0.805 0.423 108 -29.181 72.45 One Sample t-test two.sided
timeAvg_ratio 158 1.09 1.950 0.053 157 0.999 1.19 One Sample t-test two.sided
rate_ratio 158 1.09 2.596 0.010 157 1.022 1.16 One Sample t-test two.sided

Number of ideas

ggplot(df.trials, aes(x=nideas, fill = object_is_match, color=object_is_match)) + 
  # geom_vline(data=mu1, 
  #            aes(xintercept=nideas_mean, color=object_is_match), 
  #            size=1)+
    geom_histogram(aes(y = after_stat(density)),
  #geom_histogram(aes(y = after_stat(count / sum(count)),
                 binwidth=1, position = position_dodge(0.3),
                 alpha=0.5, color="grey30") +
scale_y_continuous(expand = expansion(mult = c(0, 0.2)),
                   labels = scales::percent_format(),
                     name='Frequency') + # no space below, 10% above bars 
  scale_color_manual(values=pal_matches)+
  scale_fill_manual(values=pal_matches) +
  theme(legend.position = c(.78,.8))

Within-subjects

df.trials %>%
  filter(!is.na(nideas)) %>%
  ggpaired(x = "object_is_match", y = "nideas",
         color = "object_is_match", line.color = "gray", line.size = 0.4,
         palette = pal_matches)+
  stat_compare_means(paired = TRUE, method="t.test") +
  theme(legend.position = 'none')

Time to first idea

ggplot(df.trials, aes(x=timeToFirstIdea, fill = object_is_match)) + 
  geom_vline(data=mu1, 
             aes(xintercept=timeToFirstIdea_mean, 
                 color=object_is_match), 
             size=1, linetype="dashed")+
#geom_density(aes(y = after_stat(count)),
  geom_histogram(aes(y = .5*2*after_stat(density)), binwidth=2,
               position = position_dodge(width=0.3*2),
               alpha=0.5, color="grey30")+
  scale_y_continuous(expand = expansion(mult = c(0, 0.2)),
                     labels = scales::percent_format(),
                     name='Frequency') + # no space below, 10% above bars 
  scale_color_manual(values=pal_matches)+
  scale_fill_manual(values=pal_matches) +
  theme(legend.position = c(.8,.8))

Within-subjects

df.trials %>% count(setID, child_hashed_id, nideas > 0) %>% filter(`nideas > 0`, n ==2) %>%
  left_join(df.trials, relationship = "many-to-many") %>%
  ggpaired(x = "object_is_match", y = "timeToFirstIdea",
         color = "object_is_match", 
         line.color = "gray", line.size = 0.4,
         palette = pal_matches)+
  stat_compare_means(paired = TRUE, method="t.test") +
  theme(legend.position = 'none') +
  labs(subtitle="Time to first idea")

Rate of ideas

ggplot(df.trials, aes(x=rateOfIdeas, fill = object_is_match)) + 
  geom_vline(data=mu1, 
             aes(xintercept=rateOfIdeas_mean, 
                 color=object_is_match), 
             size=1, linetype="dashed")+
geom_histogram(aes(y = .5*.2*after_stat(density)), 
               binwidth=0.2,
               position = position_dodge(width=0.3*0.2),
               #geom_density(aes(y = after_stat(count)),
  alpha=0.5, color="grey30")+
  scale_y_continuous(expand = expansion(mult = c(0, 0.2)),
                     labels = scales::percent_format(),
                     name='Frequency') + # no space below, 10% above bars 
  scale_color_manual(values=pal_matches)+
  scale_fill_manual(values=pal_matches) +
  theme(legend.position = c(.8,.8))

Within-subjects

Avg time per idea

ggplot(df.trials, aes(x=timeAvg, fill = object_is_match)) + 
  geom_vline(data=mu1, 
             aes(xintercept=timeAvg_mean, 
                 color=object_is_match), 
             size=1, linetype="dashed")+
#geom_density(aes(y = after_stat(count)),
  geom_histogram(aes(y = .5*2*after_stat(density)), binwidth=2,
               position = position_dodge(width=0.3*2),
               alpha=0.5, color="grey30")+
  scale_y_continuous(expand = expansion(mult = c(0, 0.2)),
                     labels = scales::percent_format(),
                     name='Frequency') + # no space below, 10% above bars 
  scale_color_manual(values=pal_matches)+
  scale_fill_manual(values=pal_matches) +
  theme(legend.position = c(.9,.8))

Within-subjects

df.trials %>% count(setID, child_hashed_id, nideas > 0) %>% filter(`nideas > 0`, n ==2) %>%
  left_join(df.trials, relationship = "many-to-many") %>%
  ggpaired(x = "object_is_match", y = "timeAvg",
         color = "object_is_match", 
         line.color = "gray", line.size = 0.4,
         palette = pal_matches)+
  stat_compare_means(paired = TRUE, method="t.test") +
  theme(legend.position = 'none') +
  labs(subtitle="Avg time per idea")

Relative ratios

Given a scene, compute Match:Non-matching object

  1. Ratio for number of ideas (21 NAs, 5 Zeros, 2 Inf)

  2. Relative time to first idea. Higher = faster for this object than other (78 NAs)

  3. Relative rate of ideas. Higher = faster to generate ideas in general (29 NAs)

plotdist(filter(df.relative.match, nideas_ratio!=Inf),
         nideas_ratio, 0.2)

plotdist(df.relative.match, time1_ratio, 0.2)

plotdist(df.relative.match, rate_ratio, 0.2)

Whether prompted object was chosen by child

paired t-test

TODO: we probably want to use GLMs instead, random effect of child and scene.

Print means and SDs

mu2 <- df.trials %>%
  group_by(object_is_chosen) %>%
  summarise_at(vars(nideas, timeToFirstIdea, timeAvg, rateOfIdeas),
            list(mean=~ mean(.x, na.rm = TRUE),
                 sd= ~sd(.x, na.rm = TRUE))) %>%
  relocate(where(is.numeric), .after =where(is.character)) # alphabetically

kable(mu2)
object_is_chosen nideas_mean timeToFirstIdea_mean timeAvg_mean rateOfIdeas_mean nideas_sd timeToFirstIdea_sd timeAvg_sd rateOfIdeas_sd
Non-preferred object 1.44 2.80 12.1 0.094 0.938 5.99 5.43 0.033
Preferred object 1.48 1.98 12.2 0.094 0.866 3.75 5.75 0.034

Test differences using within-sub paired t-test

# CHOSEN VS NOT CHOSEN
df.relative.chosen <- df.trials %>% 
  filter(!is.na(nideas)) %>%
  select(child_hashed_id, child_age_months, 
                     setID, scene, object_is_chosen, 
         nideas, timeToFirstIdea,timeAvg, rateOfIdeas) %>%
  mutate(object_is_chosen = object_is_chosen=="Preferred object") %>%
  pivot_wider(names_from = object_is_chosen,
              values_from = c(nideas, timeToFirstIdea, timeAvg, rateOfIdeas)) %>%
  mutate(nideas_ratio = nideas_TRUE / nideas_FALSE,
         time1_ratio = timeToFirstIdea_TRUE / timeToFirstIdea_FALSE,
         timeAvg_ratio = timeAvg_TRUE /timeAvg_FALSE,
         rate_ratio = rateOfIdeas_TRUE / rateOfIdeas_FALSE,
         nideas_diff = nideas_TRUE - nideas_FALSE,
         time1_diff = timeToFirstIdea_TRUE - timeToFirstIdea_FALSE,
         timeAvg_diff = timeAvg_TRUE - timeAvg_FALSE,
         rate_diff = rateOfIdeas_TRUE - rateOfIdeas_FALSE)

# t-test of within-sub difference
mu2a <- df.relative.chosen %>% 
  select(-ends_with('TRUE'), -ends_with('FALSE')) %>%
  #pivot_longer(cols = ends_with("diff")) %>%
  pivot_longer(cols = ends_with("diff")) %>%
  filter(value!= Inf) %>%
  group_by(name) %>% # for each measure
  nest() %>% # nest the data
  mutate(
    N = map(data, nrow),
    t_test = map(data, ~{t.test(.x$value) %>% tidy()})) %>% 
  select(-data) %>%
  unnest(cols = c(N, t_test))

kable(mu2a)
name N estimate statistic p.value parameter conf.low conf.high method alternative
nideas_diff 169 0.012 0.170 0.865 168 -0.126 0.149 One Sample t-test two.sided
time1_diff 109 -0.561 -0.947 0.346 108 -1.736 0.613 One Sample t-test two.sided
timeAvg_diff 158 0.369 0.743 0.458 157 -0.611 1.348 One Sample t-test two.sided
rate_diff 158 -0.002 -0.782 0.435 157 -0.008 0.004 One Sample t-test two.sided

Test if ratios differ from 1

# t-test of within-sub difference
mu2b <- df.relative.chosen %>% 
  select(-ends_with('TRUE'), -ends_with('FALSE')) %>%
  pivot_longer(cols = ends_with("ratio")) %>%
  filter(value!= Inf) %>%
  group_by(name) %>% # for each measure
  nest() %>% # nest the data
  mutate(
    N = map(data, nrow),
    t_test = map(data, ~{t.test(.x$value, mu=1) %>% tidy()})) %>% 
  select(-data) %>%
  unnest(cols = c(N, t_test))

kable(mu2b)
name N estimate statistic p.value parameter conf.low conf.high method alternative
nideas_ratio 161 1.07 1.73 0.086 160 0.990 1.16 One Sample t-test two.sided
time1_ratio 109 -4.25 -1.02 0.310 108 -14.448 5.96 One Sample t-test two.sided
timeAvg_ratio 158 1.13 2.70 0.008 157 1.034 1.22 One Sample t-test two.sided
rate_ratio 158 1.06 1.61 0.108 157 0.987 1.13 One Sample t-test two.sided

Number of ideas

ggplot(df.trials, aes(x=nideas, fill = object_is_chosen, color=object_is_chosen)) + 
  # geom_vline(data=mu2, 
  #            aes(xintercept=nideas_mean, color=object_is_match), 
  #            size=1)+
    geom_histogram(aes(y = after_stat(density)),
  #geom_histogram(aes(y = after_stat(count / sum(count)),
                 binwidth=1, position = position_dodge(0.3),
                 alpha=0.5, color="grey30") +
  scale_y_continuous(expand = expansion(mult = c(0, 0.2)),
                     labels = scales::percent_format(),
                     name='Frequency') + # no space below, 10% above bars 
  scale_color_manual(values=pal_chosen)+
  scale_fill_manual(values=pal_chosen) +
  theme(legend.position = c(.8,.8))

Within-subjects

df.trials %>%
  filter(!is.na(nideas)) %>%
  ggpaired(x = "object_is_chosen", y = "nideas",
         color = "object_is_chosen", line.color = "gray", line.size = 0.4,
         palette = pal_chosen)+
  stat_compare_means(paired = TRUE, method="t.test") +
  theme(legend.position = 'none')

Time to first idea

ggplot(df.trials, aes(x=timeToFirstIdea, fill = object_is_chosen)) + 
  geom_vline(data=mu2, 
             aes(xintercept=timeToFirstIdea_mean, 
                 color=object_is_chosen), 
             size=1, linetype="dashed")+
#geom_density(aes(y = after_stat(count)),
  geom_histogram(aes(y = .5*2*after_stat(density)), binwidth=2,
               position = position_dodge(width=0.3*2),
               alpha=0.5, color="grey30")+
  scale_y_continuous(expand = expansion(mult = c(0, 0.2)),
                     labels = scales::percent_format(),
                     name='Frequency') + # no space below, 10% above bars 
  scale_color_manual(values=pal_chosen)+
  scale_fill_manual(values=pal_chosen) +
  theme(legend.position = c(.9,.8))

Within-subjects

df.trials %>% count(setID, child_hashed_id, nideas > 0) %>% filter(`nideas > 0`, n ==2) %>%
  left_join(df.trials, relationship = "many-to-many") %>%
  ggpaired(x = "object_is_chosen", y = "timeToFirstIdea",
         color = "object_is_chosen", 
         line.color = "gray", line.size = 0.4,
         palette = pal_matches)+
  stat_compare_means(paired = TRUE, method="t.test") +
  theme(legend.position = 'none') +
  labs(subtitle="Time to first idea")

Rate of ideas

ggplot(df.trials, aes(x=rateOfIdeas, fill = object_is_chosen)) + 
  # geom_vline(data=mu2, 
  #            aes(xintercept=rateOfIdeas_mean, 
  #                color=object_is_chosen), 
  #            size=1, linetype="dashed")+
geom_histogram(aes(y = .5*.2*after_stat(density)), 
               binwidth=0.2,
               position = position_dodge(width=0.3*0.2),
               #geom_density(aes(y = after_stat(count)),
  alpha=0.5, color="grey30")+
  scale_y_continuous(expand = expansion(mult = c(0, 0.2)),
                     labels = scales::percent_format(),
                     name='Frequency') + # no space below, 10% above bars 
  scale_color_manual(values=pal_chosen)+
  scale_fill_manual(values=pal_chosen) +
  theme(legend.position = c(.8,.8))

Within-subjects

Avg time per idea

ggplot(df.trials, aes(x=timeAvg, fill = object_is_chosen)) + 
  geom_vline(data=mu2, 
             aes(xintercept=timeAvg_mean, 
                 color=object_is_chosen), 
             size=1, linetype="dashed")+
#geom_density(aes(y = after_stat(count)),
  geom_histogram(aes(y = .5*2*after_stat(density)), binwidth=2,
               position = position_dodge(width=0.3*2),
               alpha=0.5, color="grey30")+
  scale_y_continuous(expand = expansion(mult = c(0, 0.2)),
                     labels = scales::percent_format(),
                     name='Frequency') + # no space below, 10% above bars 
  scale_color_manual(values=pal_chosen)+
  scale_fill_manual(values=pal_chosen) +
  theme(legend.position = c(.9,.8))

Within-subjects

df.trials %>% count(setID, child_hashed_id, nideas > 0) %>% filter(`nideas > 0`, n ==2) %>%
  left_join(df.trials, relationship = "many-to-many") %>%
  ggpaired(x = "object_is_chosen", y = "timeAvg",
         color = "object_is_chosen", 
         line.color = "gray", line.size = 0.4,
         palette = pal_chosen)+
  stat_compare_means(paired = TRUE, method="t.test") +
  theme(legend.position = 'none') +
  labs(subtitle="Avg time per idea")

Relative ratios

Given a scene, compute Match:Non-matching object

  1. Ratio for number of ideas (21 NAs, 5 Zeros, 2 Inf)

  2. Relative time to first idea. Higher = faster for this object than other (78 NAs)

  3. Relative avg time per idea. Higher = denser for this object.

  4. Relative rate of ideas. Higher = faster to generate additional ideas past idea 1 (29 NAs)

plotdist(filter(df.relative.chosen, nideas_ratio!=Inf),
         nideas_ratio, 0.2)

plotdist(df.relative.chosen, time1_ratio, 0.2)

plotdist(df.relative.chosen, timeAvg_ratio, 0.2)

plotdist(df.relative.chosen, rate_ratio, 0.2)

Visualize responses by item

For each item (scene-object pair), let’s visualize the distributions of various outcome measures.

First let’s write a function that will generate the same kind of boxplot figure.

makeboxplots <- function(data, dv, groupvar, palette, xv=scene) {
  XV = enquo(xv)
  DV = enquo(dv)
  GROUP = enquo(groupvar)
  ggplot(data, aes(x = !!XV, y = !!DV, color = !!GROUP, fill = !!GROUP)) +
  geom_boxplot(position = position_dodge(width=0.8), alpha=0.2) +
  geom_point(alpha=0.5,
             position = position_jitterdodge(jitter.width = 0.5, jitter.height = 0,
                                             dodge.width=0.8)) +
  scale_color_manual(values=palette)+
  scale_fill_manual(values=palette)+
  theme(axis.text.x = element_text(angle = 90, hjust=1, vjust=0.5),
        legend.position = "none") +
  guides(colour = guide_legend(nrow = 1))
}

Match / Non-Match

Number of ideas

df.trials %>% 
  mutate(scene = fct_reorder(scene, setID)) %>%
makeboxplots(., nideas, object_is_match, pal_matches)

Time to first idea

df.trials %>% 
  mutate(scene = fct_reorder(scene, setID)) %>%
makeboxplots(., timeToFirstIdea, object_is_match, pal_matches)

Average time per idea

df.trials %>% 
  mutate(scene = fct_reorder(scene, setID)) %>%
  makeboxplots(., timeAvg, object_is_match, pal_matches)

Rate of ideas

Inversely, number of ideas per second

df.trials %>% 
  mutate(scene = fct_reorder(scene, setID)) %>%
  makeboxplots(.,rateOfIdeas, object_is_match, pal_matches)

Chosen/Non-chosen object

Number of ideas

df.trials %>% 
  mutate(scene = fct_reorder(scene, setID)) %>%
makeboxplots(., nideas, object_is_chosen, pal_chosen)

Time to first idea

df.trials %>% 
  mutate(scene = fct_reorder(scene, setID)) %>%
makeboxplots(., timeToFirstIdea, object_is_chosen, pal_chosen)

Average time per idea

df.trials %>% 
  mutate(scene = fct_reorder(scene, setID)) %>%
  makeboxplots(., timeAvg, object_is_chosen, pal_chosen)

Rate of ideas

Inversely, number of ideas per second

df.trials %>% 
  mutate(scene = fct_reorder(scene, setID)) %>%
  makeboxplots(., rateOfIdeas, object_is_chosen, pal_chosen)

Age effects?

Continuous, months

Older kids say more ideas

ggscatter(df.trials, 
          x = "child_age_months", y = "nideas",
          add = "reg.line", conf.int = TRUE,cor.coef = TRUE,
          size=2, alpha=0.3) +
  scale_x_continuous(breaks=c(60, 72, 84, 96),
                     labels=c(5, 6, 7, 8),
                     name="Age (years)")

time to first ideas

ggscatter(df.trials, 
          x = "child_age_months", y = "timeToFirstIdea",
          add = "reg.line", conf.int = TRUE,cor.coef = TRUE,
          size=2, alpha=0.3) +
  scale_x_continuous(breaks=c(60, 72, 84, 96),
                     labels=c(5, 6, 7, 8),
                     name="Age (years)")

## By match / non-math

# ggscatter(df.trials, 
#           x = "child_age_months", y = "timeToFirstIdea",
#           add = "reg.line",conf.int = TRUE,cor.coef = TRUE,
#           color = "object_is_match", size = 3, alpha = 0.6,
#  palette = pal_matches
# )

## By chosen / non-chosen

# ggscatter(df.trials, 
#           x = "child_age_months", y = "timeToFirstIdea",
#           add = "reg.line", conf.int = TRUE,cor.coef = TRUE,
#           color = "object_is_chosen", size = 3, alpha = 0.6,
#  palette = pal_chosen
# )

rate of ideas (number of ideas per second)

ggscatter(df.trials, 
          x = "child_age_months", y = "rateOfIdeas",
          add = "reg.line", conf.int = TRUE,cor.coef = TRUE,
          size=2, alpha=0.3) +
  scale_x_continuous(breaks=c(60, 72, 84, 96),
                     labels=c(5, 6, 7, 8),
                     name="Age (years)")

Age X chosen/non-chosen

Older children generate more ideas for both chosen & non-chosen objects

ggscatter(df.trials, 
          x = "child_age_months", y = "nideas",
          color = "object_is_chosen",
          add = "reg.line", conf.int = TRUE,
          size=3, alpha=0.4) +
  stat_cor(aes(color = object_is_chosen), label.x = 60) +
  scale_color_manual(values = pal_chosen) +
  scale_fill_manual(values = pal_chosen)

df.trials %>%
  filter(!is.na(nideas)) %>%
  ggpaired(x = "object_is_match", y = "nideas",
         color = "object_is_match", line.color = "gray", line.size = 0.4,
         palette = pal_matches, facet.by = "child_age_years")+
  stat_compare_means(paired = TRUE, method="t.test") +
    theme(axis.text.x = element_text(angle = 90, hjust=1, vjust=0.5))

Fatigue / Order effects?

Number of ideas

ggplot(df.generate.trials, aes(x = trialnumber, y = nideas,
                               color=trialnumber)) +
  geom_violin(aes(group=trialnumber)) + 
  geom_smooth(method="lm", color="orange")+
  geom_point(alpha=0.5,
             position = position_jitter(width = 0.3, height = 0)) +
  scale_x_continuous(limits = c(1,16.5), breaks=c(1,4,8,12,16)) +
  theme(axis.text.x = element_text(angle = 90, hjust=1, vjust=0.5),
        legend.position = 'none') +
  guides(colour = guide_legend(nrow = 1))

ggscatter(df.generate.trials, 
          x = "trialnumber", y = "timeToFirstIdea",
          add = "reg.line",conf.int = TRUE,cor.coef = TRUE,
          color = "object_is_match", size = 3, alpha = 0.6,
 palette = pal_matches
)

# ggplot(df.generate.trials, aes(x = trialnumber, y = timeToFirstIdea,
#                                color=trialnumber)) +
#   geom_violin(aes(group=trialnumber)) + 
#   geom_smooth(method="lm", color="orange")+
#   geom_point(alpha=0.5,
#              position = position_jitter(width = 0.3, height = 0)) +
#   scale_x_continuous(limits = c(1,16.5), breaks=c(1,4,8,12,16)) +
#   theme(axis.text.x = element_text(angle = 90, hjust=1, vjust=0.5),
#         legend.position = 'none') +
#   guides(colour = guide_legend(nrow = 1))

Pairwise Correlations between measures

Each point represents a scene-object pair. For each scene, we compute chosen_proportion (how often that object was chosen for this scene) and correlate it against each generate DV, see bottom row of scatter plots.

library(GGally)
ggpairs(
  select(df.items, 
         mean_nideas, mean_time1, mean_rateideas, mean_timeAvg, 
         chosen_proportion),
  lower = list(continuous = wrap("smooth", alpha = 0.3))) +
  theme_bw()

Print correlation tables (skipped, shown in plot)

GLM 3: object_is_chosen ~ ???

This is the 3rd kind of analysis as noted previously.

visualizations - paired t-test

N Ideas , Match / Non-match

ggplot(df.trials, aes(x = object_is_match, y = nideas, 
                      color = object_is_match, fill = object_is_match)) +
  geom_violin(alpha=0.2) +
  geom_point(alpha=0.5,
             position = position_jitterdodge(jitter.width = 1, jitter.height = 0.2,
                                             dodge.width=0.8)) +
  scale_color_manual(values=pal_matches)+
  scale_fill_manual(values=pal_matches)+
  theme(axis.title.x=element_blank(),
        legend.position = "none") +
  stat_compare_means()

N Ideas, Chosen / Not

ggplot(df.trials, aes(x = object_is_chosen, y = nideas, 
                      color = object_is_chosen, fill = object_is_chosen)) +
  geom_violin(alpha=0.2) +
  geom_point(alpha=0.5,
             position = position_jitterdodge(jitter.width = 1, jitter.height = 0.2,
                                             dodge.width=0.8)) +
  scale_color_manual(values=pal_chosen)+
  scale_fill_manual(values=pal_chosen)+
  theme(axis.title.x=element_blank(),
        legend.position = "none") +
  stat_compare_means(label.x = 1.5)

Time 1, Match / Non-match

ggplot(df.trials, aes(x = object_is_match, y = timeToFirstIdea, 
                      color = object_is_match, fill = object_is_match)) +
  geom_violin(alpha=0.2) +
  geom_point(alpha=0.5,
             position = position_jitterdodge(jitter.width = 1, jitter.height = 0.2,
                                             dodge.width=0.8)) +
  scale_color_manual(values=pal_matches)+
  scale_fill_manual(values=pal_matches)+
  theme(axis.title.x=element_blank(),
        legend.position = "none") +
  stat_compare_means()

Chosen / Not

ggplot(df.trials, aes(x = object_is_chosen, y = timeToFirstIdea, 
                      color = object_is_chosen, fill = object_is_chosen)) +
  geom_violin(alpha=0.2) +
  geom_point(alpha=0.5,
             position = position_jitterdodge(jitter.width = 1, jitter.height = 0.2,
                                             dodge.width=0.8)) +
  scale_color_manual(values=pal_chosen)+
  scale_fill_manual(values=pal_chosen)+
  theme(axis.title.x=element_blank(),
        legend.position = "none") +
  stat_compare_means(label.x = 1.5)

## trial-level data

Match / non-match sig.

Children are more likely to choose objects that match the given scene, controlling for age, and random effects of childID and object_label.

object_is_chosen ~ 1 + object_is_match + child_age_years + (1 | object_generate) + (1 | child_hashed_id)

m.match <- glmer(object_is_chosen ~ 1 + object_is_match + child_age_years + (1 | object_generate) + (1 | child_hashed_id),
                      data = df.trials, 
                      family = binomial)
summary(m.match)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: object_is_chosen ~ 1 + object_is_match + child_age_years + (1 |  
##     object_generate) + (1 | child_hashed_id)
##    Data: df.trials
## 
##      AIC      BIC   logLik deviance df.resid 
##      466      486     -228      456      376 
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -2.902 -0.803  0.321  0.802  2.802 
## 
## Random effects:
##  Groups          Name        Variance Std.Dev.
##  child_hashed_id (Intercept) 3.90e-07 0.000625
##  object_generate (Intercept) 8.44e-01 0.918495
## Number of obs: 381, groups:  child_hashed_id, 24; object_generate, 16
## 
## Fixed effects:
##                             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                   -1.033      0.939   -1.10     0.27    
## object_is_matchMatch Object    1.932      0.287    6.72  1.8e-11 ***
## child_age_years                0.011      0.137    0.08     0.94    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) ob__MO
## objct_s_mMO -0.154       
## child_g_yrs -0.949  0.001
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## Model failed to converge with max|grad| = 0.00750504 (tol = 0.002, component 1)

Report standardized regression coefficients (odds ratios)

kable(tidy(m.match, exponentiate=T, conf.int=T))
effect group term estimate std.error statistic p.value conf.low conf.high
fixed NA (Intercept) 0.356 0.334 -1.10 0.271 0.057 2.24
fixed NA object_is_matchMatch Object 6.905 1.984 6.72 0.000 3.932 12.13
fixed NA child_age_years 1.011 0.139 0.08 0.936 0.773 1.32
ran_pars child_hashed_id sd__(Intercept) 0.001 NA NA NA NA NA
ran_pars object_generate sd__(Intercept) 0.918 NA NA NA NA NA

NIDEAS n.s.

object_is_chosen ~ 1 + nideas + (1 | object_generate) + (1 | child_hashed_id)

No effect, even without controlling for age.

m.nidea <- glmer(object_is_chosen ~ 1 + nideas + (1 | object_generate) + (1 | child_hashed_id),
                      data = df.trials, 
                      family = binomial)
summary(m.nidea)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: object_is_chosen ~ 1 + nideas + (1 | object_generate) + (1 |  
##     child_hashed_id)
##    Data: df.trials
## 
##      AIC      BIC   logLik deviance df.resid 
##      486      502     -239      478      352 
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -1.939 -0.884  0.517  0.882  1.729 
## 
## Random effects:
##  Groups          Name        Variance Std.Dev.
##  child_hashed_id (Intercept) 0.000    0.000   
##  object_generate (Intercept) 0.448    0.669   
## Number of obs: 356, groups:  child_hashed_id, 24; object_generate, 16
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)
## (Intercept)  0.03016    0.27165    0.11     0.91
## nideas       0.00433    0.12500    0.03     0.97
## 
## Correlation of Fixed Effects:
##        (Intr)
## nideas -0.672
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')

Time1 n.s.

object_is_chosen ~ 1 + timeToFirstIdea + (1 | object_generate) + (1 | child_hashed_id)

m.time1 <- glmer(object_is_chosen ~ 1 + timeToFirstIdea + (1 | object_generate) + (1 | child_hashed_id),
                      data = df.trials, 
                      family = binomial)
summary(m.time1)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: object_is_chosen ~ 1 + timeToFirstIdea + (1 | object_generate) +  
##     (1 | child_hashed_id)
##    Data: df.trials
## 
##      AIC      BIC   logLik deviance df.resid 
##      330      344     -161      322      238 
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -2.025 -0.909  0.491  0.868  1.798 
## 
## Random effects:
##  Groups          Name        Variance Std.Dev.
##  child_hashed_id (Intercept) 0.000    0.000   
##  object_generate (Intercept) 0.553    0.743   
## Number of obs: 242, groups:  child_hashed_id, 17; object_generate, 16
## 
## Fixed effects:
##                 Estimate Std. Error z value Pr(>|z|)
## (Intercept)       0.0929     0.2409    0.39     0.70
## timeToFirstIdea  -0.0183     0.0290   -0.63     0.53
## 
## Correlation of Fixed Effects:
##             (Intr)
## timeTFrstId -0.282
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')

Rate n.s.

chosen_is_match ~ 1 + rateOfIdeas + (1 | object_generate) + (1 | child_hashed_id)

m.rate <- glmer(object_is_chosen ~ 1 + rateOfIdeas + (1 | object_generate) + (1 | child_hashed_id),
                      data = df.trials, 
                      family = binomial)
summary(m.rate)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: object_is_chosen ~ 1 + rateOfIdeas + (1 | object_generate) +  
##     (1 | child_hashed_id)
##    Data: df.trials
## 
##      AIC      BIC   logLik deviance df.resid 
##      468      483     -230      460      338 
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -1.931 -0.920  0.518  0.884  1.712 
## 
## Random effects:
##  Groups          Name        Variance Std.Dev.
##  child_hashed_id (Intercept) 0.000    0.000   
##  object_generate (Intercept) 0.445    0.667   
## Number of obs: 342, groups:  child_hashed_id, 24; object_generate, 16
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)
## (Intercept)    0.228      0.385    0.59     0.55
## rateOfIdeas   -1.881      3.475   -0.54     0.59
## 
## Correlation of Fixed Effects:
##             (Intr)
## rateOfIdeas -0.851
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')

Ratio DVs

First prepare a different data frame – per trial, add relative DVs

df.glm <- df.trials %>%
  left_join(df.relative.chosen) %>%
  left_join(select(df.trials, child_hashed_id, setID, scene,
                   object_generate, object_is_match, object_is_chosen))# %>%
  #mutate(object_is_chosen = ifelse(object_is_chosen=="Preferred object", 1, 0))

nideas ratio n.s.

object_is_chosen ~ 1 + nideas_ratio + (1 | object_generate) + (1 | child_hashed_id)

Time 1 ratio n.s.

object_is_chosen ~ 1 + time1_ratio + (1 | object_generate) + (1 | child_hashed_id)

m.time1Ratio <- glmer(object_is_chosen ~ 1 + time1_ratio + 
                       (1 | object_generate) + (1 | child_hashed_id),
                      data = df.glm, 
                      family = binomial)
summary(m.time1Ratio)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: object_is_chosen ~ 1 + time1_ratio + (1 | object_generate) +  
##     (1 | child_hashed_id)
##    Data: df.glm
## 
##      AIC      BIC   logLik deviance df.resid 
##      299      312     -145      291      214 
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -1.915 -0.908  0.000  0.908  1.915 
## 
## Random effects:
##  Groups          Name        Variance Std.Dev.
##  child_hashed_id (Intercept) 0.000    0.000   
##  object_generate (Intercept) 0.587    0.766   
## Number of obs: 218, groups:  child_hashed_id, 17; object_generate, 16
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.24e-06   2.41e-01       0        1
## time1_ratio 4.02e-08   2.67e-03       0        1
## 
## Correlation of Fixed Effects:
##             (Intr)
## time1_ratio 0.051 
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')

GLM4: Item-level proportion chosen ***

Match sig.

Whether object is a match?

chosen_proportion ~ object_is_match + (1|scene)

mitem.match <- lmer(chosen_proportion ~ object_is_match + (1|scene),
                      data=df.items)
anova(mitem.match) # Match significant!!
## Analysis of Variance Table
##                 npar Sum Sq Mean Sq F value
## object_is_match    1  0.928   0.928    19.8
tidy(mitem.match) %>% kable()
effect group term estimate std.error statistic
fixed NA (Intercept) 0.330 0.054 6.09
fixed NA object_is_matchMatch Object 0.341 0.077 4.45
ran_pars scene sd__(Intercept) 0.000 NA NA
ran_pars Residual sd__Observation 0.217 NA NA

Avg nideas, n.s.

Scatter plot

ggplot(df.items, aes(x=mean_nideas, y=chosen_proportion)) +
  geom_smooth(method="lm")+
  geom_point(size=3, alpha=0.5) +
  stat_cor(label.x=1.9, label.y=0.22) +
    labs(x="Avg. n_ideas", y="Proportion chosen")

chosen_proportion ~ mean_nideas + (1|scene)

How many ideas were produced? n.s.

mitem.nidea <- lmer(chosen_proportion ~ mean_nideas + (1|scene),
                      data=df.items)
anova(mitem.nidea)
## Analysis of Variance Table
##             npar Sum Sq Mean Sq F value
## mean_nideas    1 0.0729  0.0729    0.97

Color by matching object

ggplot(df.items, aes(x=mean_nideas, y=chosen_proportion, 
                     color=object_is_match, fill=object_is_match)) +
  geom_smooth(method="lm", alpha=0.2)+
  geom_point(size=3, alpha=0.5) +
  stat_cor(label.x=1.9, label.y=c(0.3, 0.4)) +
  scale_y_continuous(labels=scales::percent_format())+
  scale_color_manual(values=pal_matches) +
  scale_fill_manual(values=pal_matches) +
  theme(legend.position = "none") +
  labs(x="Avg. n_ideas", y="Proportion chosen")

Match sig., controlling for n_ideas

chosen_proportion ~ object_is_match + mean_nideas +(1|scene)

mitem.match.idea <- lmer(chosen_proportion ~ object_is_match + mean_nideas +(1|scene),
                      data=df.items)
tidy(mitem.match.idea) %>% kable()
effect group term estimate std.error statistic
fixed NA (Intercept) 0.166 0.202 0.822
fixed NA object_is_matchMatch Object 0.335 0.077 4.332
fixed NA mean_nideas 0.109 0.129 0.841
ran_pars scene sd__(Intercept) 0.000 NA NA
ran_pars Residual sd__Observation 0.218 NA NA

Time to first idea, sig.

Scatter plot

ggplot(df.items, aes(x=mean_time1, y=chosen_proportion)) +
  geom_smooth(method="lm")+
  geom_point(size=3, alpha=0.5) +
  stat_cor(label.x=15) +
    labs(x="Avg. Time to first idea (seconds)", y="Proportion chosen")

chosen_proportion ~ mean_time1 + (1|scene)

mitem.time1 <- lmer(chosen_proportion ~ mean_time1 + (1|scene),
                      data=df.items)
tidy(mitem.time1) %>% kable()
effect group term estimate std.error statistic
fixed NA (Intercept) 0.674 0.068 9.91
fixed NA mean_time1 -0.074 0.023 -3.27
ran_pars scene sd__(Intercept) 0.000 NA NA
ran_pars Residual sd__Observation 0.240 NA NA

Color by matching object

ggplot(df.items, aes(x=mean_time1, y=chosen_proportion, 
                     color=object_is_match, fill=object_is_match)) +
  geom_smooth(method="lm", alpha=0.2)+
  geom_point(size=3, alpha=0.5) +
  stat_cor(label.x=16, label.y=c(0.85, 0.95)) +
  scale_y_continuous(labels=scales::percent_format())+
  scale_color_manual(values=pal_matches) +
  scale_fill_manual(values=pal_matches) +
  theme(legend.position = "none") +
  labs(x="Avg. Time to first idea (seconds)", y="Proportion chosen")

Time1 + match or not: Time1 sig, Match sig.

chosen_proportion ~ mean_time1 + object_is_match + (1|scene)

mitem.match.time1 <- lmer(chosen_proportion ~ mean_time1 + object_is_match + (1|scene),
                      data=df.items)
tidy(mitem.match.time1) %>% kable()
effect group term estimate std.error statistic
fixed NA (Intercept) 0.488 0.069 7.05
fixed NA mean_time1 -0.058 0.018 -3.15
fixed NA object_is_matchMatch Object 0.296 0.069 4.31
ran_pars scene sd__(Intercept) 0.000 NA NA
ran_pars Residual sd__Observation 0.190 NA NA

Rate of ideas? significant chosen_proportion ~ mean_rate + (1|scene)

mitem.rate <- lmer(chosen_proportion ~ mean_rateideas + (1|scene),
                      data=df.items)
tidy(mitem.rate)
## # A tibble: 4 × 6
##   effect   group    term            estimate std.error statistic
##   <chr>    <chr>    <chr>              <dbl>     <dbl>     <dbl>
## 1 fixed    <NA>     (Intercept)       -0.607     0.424     -1.43
## 2 fixed    <NA>     mean_rateideas    11.7       4.47       2.63
## 3 ran_pars scene    sd__(Intercept)    0        NA         NA   
## 4 ran_pars Residual sd__Observation    0.252    NA         NA

Match controlling for rate of ideas? Yes. Avg rate not predictive.

mitem.match.rate <- lmer(chosen_proportion ~ object_is_match + mean_rateideas + (1|scene),
                      data=df.items)
tidy(mitem.match.rate)
## # A tibble: 5 × 6
##   effect   group    term                        estimate std.error statistic
##   <chr>    <chr>    <chr>                          <dbl>     <dbl>     <dbl>
## 1 fixed    <NA>     (Intercept)                   -0.421    0.347      -1.21
## 2 fixed    <NA>     object_is_matchMatch Object    0.302    0.0742      4.07
## 3 fixed    <NA>     mean_rateideas                 8.16     3.73        2.19
## 4 ran_pars scene    sd__(Intercept)                0       NA          NA   
## 5 ran_pars Residual sd__Observation                0.204   NA          NA